"""
fuzzy_equal_for_diff.py

Copyright 2018 Andres Riancho

This file is part of w3af, http://w3af.org/ .

w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

"""

from w3af.core.controllers.misc.diff import split_by_sep
from w3af.core.controllers.misc.fuzzy_string_cmp import fuzzy_equal


NOT_HASH = {'\t',
            '\n',
            '\r',
            ' ',
            '!',
            '"',
            '#',
            '$',
            "'",
            '(',
            ')',
            '*',
            ',',
            '.',
            '/',
            ':',
            ';',
            '<',
            '=',
            '>',
            '?',
            '@',
            '[',
            '\\',
            ']',
            '^',
            '`',
            '{',
            '|',
            '}',
            '~'}


def fuzzy_equal_for_diff(diff_x, diff_y, is_equal_ratio):
    """
    Does a quick estimation to determine if the two strings (diff_x and diff_y)
    are fuzzy equal.

    Not using fuzzy_equal() to compare results of applying diff() because of
    CSRF tokens and other randomly generated tokens which were breaking the
    comparison.

    This function removes those randomly generated strings and then does the
    compare.

    :param diff_x: Result of running diff() on responses A and B
    :param diff_y: Result of running diff() on responses B and C
    :param is_equal_ratio: The ratio to use when comparing the responses (0 to 1)
    :return: True if the two results of applying the diff() function are
             fuzzy equal (applying split_by_sep technique)
    """
    if diff_x == diff_y:
        return True

    split_x = split_by_sep(diff_x)
    split_y = split_by_sep(diff_y)

    split_x = remove_hashes(split_x)
    split_y = remove_hashes(split_y)

    x = '\n'.join(split_x)
    y = '\n'.join(split_y)

    return fuzzy_equal(x, y, threshold=is_equal_ratio)


def remove_hashes(string_list):
    """
    Given a list of strings, this method removes any hashes.

    :param string_list: List of strings, usually generated by split_by_sep()
    :return: The string_list without any strings that look like hashes
    """
    without_hashes = []

    for text in string_list:
        if not is_hash(text):
            without_hashes.append(text)

    return without_hashes


def is_hash(possible_hash):
    """
    IMPORTANT: This function takes the input of split_by_sep() and can NOT be
               used without it!

    :param possible_hash: A string that may be a hash.
    :return: True if the possible_hash looks like a hash.
    """
    #
    # Find the things that 100% make this string NOT a hash. This will
    # allow other common strings which are not hashes, like base64 encoded
    # data and other similar encodings
    #
    for c in possible_hash:
        if c in NOT_HASH:
            return False

    #
    # This method is too simplified and will yield false negatives.
    #
    return True
